import os, sys, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.append('..')
from prediction_denoise import prediction
from glob import glob
import librosa
import IPython.display as ipd
from signal_utils import audio_files_to_numpy, numpy_audio_to_matrix_spectrogram, snr_db, psnr, apply_noise
from data_plot import plot_spectrogram, plot_3_spectograms
import matplotlib.pyplot as plt
Read noisy voices to clean them up.
noisy_voices_list = glob('..\\data\\validation\\noisy_voice\\*')
noisy_voices_list = [os.path.basename(voice) for voice in noisy_voices_list]
print(noisy_voices_list)
['karol_birds.wav', 'karol_klawiatura.wav', 'karol_myszka.wav', 'marcin_klawiatura.wav']
def predict(audio_input_prediction, audio_output_prediction, sr=8000, name_model='model_unet', path='..\\data\\validation\\'):
# path to find pre-trained weights / save models
weights_path = '..\\data\\weights'
# pre trained model
name_model = name_model
# directory where read noisy sound to denoise
audio_dir_prediction = f'{path}noisy_voice'
# directory to save the denoise sound
dir_save_prediction = f'{path}save_prediction\\'
# Name noisy sound file to denoise
audio_input_prediction = [audio_input_prediction]
# Name of denoised sound file to save
audio_output_prediction = audio_output_prediction
# Sample rate to read audio
sample_rate = sr
# Minimum duration of audio files to consider
min_duration = 1.0
# Frame length for training data
frame_length = 8064
# hop length for sound files
hop_length_frame = 8064
# nb of points for fft(for spectrogram computation)
n_fft = 255
# hop length for fft
hop_length_fft = 63
prediction(weights_path, name_model, audio_dir_prediction, dir_save_prediction, audio_input_prediction,
audio_output_prediction, sample_rate, min_duration, frame_length, hop_length_frame, n_fft,
hop_length_fft)
for voice in noisy_voices_list:
predict(audio_input_prediction=voice, audio_output_prediction=f'pred_{voice}')
Loaded model from disk 128 (22, 128, 128) (22, 128, 128) 8064 63 Loaded model from disk 128 (22, 128, 128) (22, 128, 128) 8064 63 Loaded model from disk 128 (18, 128, 128) (18, 128, 128) 8064 63 Loaded model from disk 128 (5, 128, 128) (5, 128, 128) 8064 63
pred_voices_list = glob('..\\data\\validation\\save_prediction\\*')
pred_voices_list = [os.path.basename(voice) for voice in pred_voices_list]
print(pred_voices_list)
['pred_karol_birds.wav', 'pred_karol_klawiatura.wav', 'pred_karol_myszka.wav', 'pred_marcin_klawiatura.wav']
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[3]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[3]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
numerator = (real_noisy.shape[0]//63) * 2
real_noisy_db, real_noisy_pha = numpy_audio_to_matrix_spectrogram(
real_noisy.reshape(1,real_noisy.shape[0]), int(numerator / 2) + 1, numerator + 1, 63
)
numerator = (real_pred.shape[0]//63) * 2
real_pred_db, real_pred_pha = numpy_audio_to_matrix_spectrogram(
real_pred.reshape(1,real_pred.shape[0]), int(numerator / 2) + 1, numerator, 63
)
plot_spectrogram(
real_noisy_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
plot_spectrogram(
real_pred_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[1]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[1]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
numerator = (real_noisy.shape[0]//63) * 2
real_noisy_db, real_noisy_pha = numpy_audio_to_matrix_spectrogram(
real_noisy.reshape(1,real_noisy.shape[0]), int(numerator / 2) + 1, numerator + 1, 63
)
numerator = (real_pred.shape[0]//63) * 2
real_pred_db, real_pred_pha = numpy_audio_to_matrix_spectrogram(
real_pred.reshape(1,real_pred.shape[0]), int(numerator / 2) + 1, numerator, 63
)
plot_spectrogram(
real_noisy_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
plot_spectrogram(
real_pred_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[2]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[2]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
numerator = (real_noisy.shape[0]//63) * 2
real_noisy_db, real_noisy_pha = numpy_audio_to_matrix_spectrogram(
real_noisy.reshape(1,real_noisy.shape[0]), int(numerator / 2) + 1, numerator + 1, 63
)
numerator = (real_pred.shape[0]//63) * 2
real_pred_db, real_pred_pha = numpy_audio_to_matrix_spectrogram(
real_pred.reshape(1,real_pred.shape[0]), int(numerator / 2) + 1, numerator, 63
)
plot_spectrogram(
real_noisy_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
plot_spectrogram(
real_pred_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[0]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[0]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
numerator = (real_noisy.shape[0]//63) * 2
real_noisy_db, real_noisy_pha = numpy_audio_to_matrix_spectrogram(
real_noisy.reshape(1,real_noisy.shape[0]), int(numerator / 2) + 1, numerator + 1, 63
)
numerator = (real_pred.shape[0]//63) * 2
real_pred_db, real_pred_pha = numpy_audio_to_matrix_spectrogram(
real_pred.reshape(1,real_pred.shape[0]), int(numerator / 2) + 1, numerator, 63
)
plot_spectrogram(
real_noisy_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
plot_spectrogram(
real_pred_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
clean_voices_list = glob('..\\data\\snr_test\\clean_voice\\*')
clean_voices_list = [os.path.basename(voice) for voice in clean_voices_list]
print(clean_voices_list)
['marcin_clean.wav']
noises_list = glob('..\\data\\snr_test\\noise\\*')
noises_list = [os.path.basename(noise) for noise in noises_list]
print(noises_list)
['keyboard.wav']
Apply noises with different SNRs.
multipliers = [0.25, 0.5, 1, 2, 4]
s, fs = librosa.load(os.path.join('..\\data\\snr_test\\clean_voice', clean_voices_list[0]))
snrs = []
n, _ = librosa.load(os.path.join('..\\data\\snr_test\\noise', noises_list[0]))
for i, multiplier in enumerate(multipliers):
snrs.append(snr_db(s, n * multiplier))
noised_voice = apply_noise('..\\data\\snr_test\\noisy_voice\\',
f'{clean_voices_list[0][:-9]}{i}_{noises_list[0]}', s, n * multiplier, fs)
print(snrs)
[9.48, 6.47, 3.46, 0.45, -2.56]
Denoise
noisy_voices_list = glob('..\\data\\snr_test\\noisy_voice\\*')
noisy_voices_list = [os.path.basename(voice) for voice in noisy_voices_list]
print(noisy_voices_list)
['marcin_0_keyboard.wav', 'marcin_1_keyboard.wav', 'marcin_2_keyboard.wav', 'marcin_3_keyboard.wav', 'marcin_4_keyboard.wav']
for voice in noisy_voices_list:
predict(audio_input_prediction=voice, audio_output_prediction=f'pred_{voice}', path='..\\data\\snr_test\\')
Loaded model from disk 128 WARNING:tensorflow:5 out of the last 5 calls to <function Model.make_predict_function.<locals>.predict_function at 0x000001FA87555510> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. (4, 128, 128) (4, 128, 128) 8064 63 Loaded model from disk 128 WARNING:tensorflow:6 out of the last 6 calls to <function Model.make_predict_function.<locals>.predict_function at 0x000001FA87555730> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. (4, 128, 128) (4, 128, 128) 8064 63 Loaded model from disk 128 WARNING:tensorflow:7 out of the last 7 calls to <function Model.make_predict_function.<locals>.predict_function at 0x000001FA873D7268> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. (4, 128, 128) (4, 128, 128) 8064 63 Loaded model from disk 128 WARNING:tensorflow:8 out of the last 8 calls to <function Model.make_predict_function.<locals>.predict_function at 0x000001FA873552F0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. (4, 128, 128) (4, 128, 128) 8064 63 Loaded model from disk 128 WARNING:tensorflow:9 out of the last 9 calls to <function Model.make_predict_function.<locals>.predict_function at 0x000001FA87355400> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. (4, 128, 128) (4, 128, 128) 8064 63
Calculate PSNRs
original, fs = librosa.load(os.path.join('..\\data\\snr_test\\clean_voice', clean_voices_list[0]))
predicted_voices_list = glob('..\\data\\snr_test\\save_prediction\\*')
predicted_voices_list = [os.path.basename(voice) for voice in predicted_voices_list]
print(predicted_voices_list)
psnrs = []
for voice in predicted_voices_list:
pred, fs = librosa.load(os.path.join('..\\data\\snr_test\\save_prediction', voice))
psnrs.append(psnr(original[:pred.shape[0]], pred))
print(psnrs)
['pred_marcin_0_keyboard.wav', 'pred_marcin_1_keyboard.wav', 'pred_marcin_2_keyboard.wav', 'pred_marcin_3_keyboard.wav', 'pred_marcin_4_keyboard.wav'] [19.01, 18.58, 18.44, 19.34, 21.1]
test1, fs = librosa.load(os.path.join('..\\data\\snr_test\\clean_voice', 'marcin_clean.wav'))
print(test1.shape, fs)
test2, fs = librosa.load(os.path.join('..\\data\\snr_test\\noisy_voice', 'marcin_0_keyboard.wav'))
print(test2.shape, fs)
test3, fs = librosa.load(os.path.join('..\\data\\snr_test\\save_prediction', 'pred_marcin_0_keyboard.wav'))
print(test3.shape, fs)
plt.plot(test1[:test3.shape[0]])
plt.plot(test3)
p1 = snr_db(test1[:test3.shape[0]], test3)
print(p1)
p2 = snr_db(test3, test1[:test3.shape[0]])
print(p2)
(96334,) 22050 (96334,) 22050 (88906,) 22050 -1.79 1.79